import pandas as pd
df=pd.read_csv('titanic.csv')

#查看信息
print(df.info())

#判断缺失值
print(df.isnull())
print(df.notna())

#计算缺失值比例
sum_=df.isnull().sum()
print(sum_)
#m:样本个数
#n:特征个数
m,n=df.shape

print(sum_/m)

ix=(df.isnull().sum(axis=0)>0)
print(ix)

col=df.columns[ix]
print(col)
print(col.values)


#填充缺失值
from sklearn.impute import SimpleImputer
#strategy:填充值类型
#mean
#median
#most_frequent
sim=SimpleImputer(strategy='median')
df['age']=sim.fit_transform(df[['age']])
df['deck']=df['deck'].fillna(3)
print(df.info())